{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "view-in-github"
},
"source": [
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "k-UobhDUMAld"
},
"source": [
"# 17: Pandas (the Basics) and Titanic Analysis"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "rxagEuvWL6t0"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "AaNWHSg7NyMf"
},
"source": [
"## 1. Load your data into a Pandas DataFrame\n",
"\n",
"https://www.youtube.com/redirect?q=http%3A%2F%2Fbiostat.mc.vanderbilt.edu%2Fwiki%2Fpub%2FMain%2FDataSets%2Ftitanic3.xls&redir_token=IS7fnKxJQSAQBgyL_W_n-Yg2XZJ8MTU4NzkxOTk0MkAxNTg3ODMzNTQy&v=zZkNOdBWgFQ&event=video_description\n",
"\n",
"\n",
"The most common options:\n",
"- read_csv\n",
"- read_excel\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "PWA6lBBPMIu2"
},
"outputs": [],
"source": [
"data = pd.read_excel('../data/titanic3 (3).xls')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"id": "hAOrZK3mOG91",
"outputId": "c3cd9590-b1c1-4c08-86f2-f762d26c55ff"
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" survived | \n",
" name | \n",
" sex | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" ticket | \n",
" fare | \n",
" cabin | \n",
" embarked | \n",
" boat | \n",
" body | \n",
" home.dest | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" Allen, Miss. Elisabeth Walton | \n",
" female | \n",
" 29.0000 | \n",
" 0 | \n",
" 0 | \n",
" 24160 | \n",
" 211.3375 | \n",
" B5 | \n",
" S | \n",
" 2 | \n",
" NaN | \n",
" St Louis, MO | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" Allison, Master. Hudson Trevor | \n",
" male | \n",
" 0.9167 | \n",
" 1 | \n",
" 2 | \n",
" 113781 | \n",
" 151.5500 | \n",
" C22 C26 | \n",
" S | \n",
" 11 | \n",
" NaN | \n",
" Montreal, PQ / Chesterville, ON | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Miss. Helen Loraine | \n",
" female | \n",
" 2.0000 | \n",
" 1 | \n",
" 2 | \n",
" 113781 | \n",
" 151.5500 | \n",
" C22 C26 | \n",
" S | \n",
" NaN | \n",
" NaN | \n",
" Montreal, PQ / Chesterville, ON | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Mr. Hudson Joshua Creighton | \n",
" male | \n",
" 30.0000 | \n",
" 1 | \n",
" 2 | \n",
" 113781 | \n",
" 151.5500 | \n",
" C22 C26 | \n",
" S | \n",
" NaN | \n",
" 135.0 | \n",
" Montreal, PQ / Chesterville, ON | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | \n",
" female | \n",
" 25.0000 | \n",
" 1 | \n",
" 2 | \n",
" 113781 | \n",
" 151.5500 | \n",
" C22 C26 | \n",
" S | \n",
" NaN | \n",
" NaN | \n",
" Montreal, PQ / Chesterville, ON | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass survived name sex \\\n",
"0 1 1 Allen, Miss. Elisabeth Walton female \n",
"1 1 1 Allison, Master. Hudson Trevor male \n",
"2 1 0 Allison, Miss. Helen Loraine female \n",
"3 1 0 Allison, Mr. Hudson Joshua Creighton male \n",
"4 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female \n",
"\n",
" age sibsp parch ticket fare cabin embarked boat body \\\n",
"0 29.0000 0 0 24160 211.3375 B5 S 2 NaN \n",
"1 0.9167 1 2 113781 151.5500 C22 C26 S 11 NaN \n",
"2 2.0000 1 2 113781 151.5500 C22 C26 S NaN NaN \n",
"3 30.0000 1 2 113781 151.5500 C22 C26 S NaN 135.0 \n",
"4 25.0000 1 2 113781 151.5500 C22 C26 S NaN NaN \n",
"\n",
" home.dest \n",
"0 St Louis, MO \n",
"1 Montreal, PQ / Chesterville, ON \n",
"2 Montreal, PQ / Chesterville, ON \n",
"3 Montreal, PQ / Chesterville, ON \n",
"4 Montreal, PQ / Chesterville, ON "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"colab_type": "code",
"id": "1NyIjgK6PGzb",
"outputId": "b0e851ea-a128-4b91-81a7-ee5b1a91c6fe"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" survived | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" fare | \n",
" body | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1309.000000 | \n",
" 1309.000000 | \n",
" 1046.000000 | \n",
" 1309.000000 | \n",
" 1309.000000 | \n",
" 1308.000000 | \n",
" 121.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 2.294882 | \n",
" 0.381971 | \n",
" 29.881135 | \n",
" 0.498854 | \n",
" 0.385027 | \n",
" 33.295479 | \n",
" 160.809917 | \n",
"
\n",
" \n",
" std | \n",
" 0.837836 | \n",
" 0.486055 | \n",
" 14.413500 | \n",
" 1.041658 | \n",
" 0.865560 | \n",
" 51.758668 | \n",
" 97.696922 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 0.000000 | \n",
" 0.166700 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.000000 | \n",
" 0.000000 | \n",
" 21.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 7.895800 | \n",
" 72.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 3.000000 | \n",
" 0.000000 | \n",
" 28.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 14.454200 | \n",
" 155.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 3.000000 | \n",
" 1.000000 | \n",
" 39.000000 | \n",
" 1.000000 | \n",
" 0.000000 | \n",
" 31.275000 | \n",
" 256.000000 | \n",
"
\n",
" \n",
" max | \n",
" 3.000000 | \n",
" 1.000000 | \n",
" 80.000000 | \n",
" 8.000000 | \n",
" 9.000000 | \n",
" 512.329200 | \n",
" 328.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass survived age sibsp parch \\\n",
"count 1309.000000 1309.000000 1046.000000 1309.000000 1309.000000 \n",
"mean 2.294882 0.381971 29.881135 0.498854 0.385027 \n",
"std 0.837836 0.486055 14.413500 1.041658 0.865560 \n",
"min 1.000000 0.000000 0.166700 0.000000 0.000000 \n",
"25% 2.000000 0.000000 21.000000 0.000000 0.000000 \n",
"50% 3.000000 0.000000 28.000000 0.000000 0.000000 \n",
"75% 3.000000 1.000000 39.000000 1.000000 0.000000 \n",
"max 3.000000 1.000000 80.000000 8.000000 9.000000 \n",
"\n",
" fare body \n",
"count 1308.000000 121.000000 \n",
"mean 33.295479 160.809917 \n",
"std 51.758668 97.696922 \n",
"min 0.000000 1.000000 \n",
"25% 7.895800 72.000000 \n",
"50% 14.454200 155.000000 \n",
"75% 31.275000 256.000000 \n",
"max 512.329200 328.000000 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "-XGnMGyMO619"
},
"source": [
"## 2. Clean up your dataset with drop(), dropna() and fillna()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "0NrOS6LGOVpc"
},
"outputs": [],
"source": [
"data = data.drop(['name', 'sibsp', 'parch', 'ticket', 'fare', 'cabin', 'embarked', 'boat', 'body', 'home.dest'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "HZxs46tQOciZ",
"outputId": "cc83d17a-ee59-41b6-e8ce-e9583189f048"
},
"outputs": [
{
"data": {
"text/plain": [
"(1046, 4)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = data.dropna(axis=0)\n",
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 282
},
"colab_type": "code",
"id": "qUjENeV1Od7M",
"outputId": "113f6799-f39a-4b3f-cee1-c10b881796f7"
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAmlElEQVR4nO3df3DU9YH/8dcm2SwESNKA+VWTEGk1ID/kwMC2noclJIQM1srcSKUWlYORS3pqegpYgSBnY2nHtjpUxjsLdyMprTeCBSkQg4bjDCCpFIMeAqVihSStTBIgZVmy7+8ffrPXNShs3L1978fnY2YH9vN5573vl5/N8vKzv1zGGCMAAACLJMR6AQAAAB9HQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWCcp1gvoj0AgoJMnT2rIkCFyuVyxXg4AALgCxhidOXNGubm5Skj49HMkcVlQTp48qby8vFgvAwAA9MP777+vq6+++lPHxGVBGTJkiKSPAqampkZsXr/frx07dqi0tFRutzti89rC6fkk52d0ej7J+Rmdnk9yfkan55Oil7Grq0t5eXnBf8c/TVwWlN6ndVJTUyNeUFJSUpSamurIO53T80nOz+j0fJLzMzo9n+T8jE7PJ0U/45W8PIMXyQIAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYJynWC8Dn1/DFL0d8Tk+i0apiaXTNdvl6Lv913uH6wxMVEZ8TANAXZ1AAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6YRWU2tpa3XjjjRoyZIgyMzN122236fDhwyFjpkyZIpfLFXK57777QsacOHFCFRUVSklJUWZmph566CFdvHjxs6cBAACOkBTO4MbGRlVWVurGG2/UxYsX9cgjj6i0tFRvv/22Bg0aFBw3f/58PfbYY8HrKSkpwb/39PSooqJC2dnZev3113Xq1Cl9+9vfltvt1ve///0IRAIAAPEurIKybdu2kOvr1q1TZmammpubdfPNNwe3p6SkKDs7+5Jz7NixQ2+//bZeeeUVZWVl6YYbbtDKlSu1aNEi1dTUKDk5uR8xAACAk4RVUD6us7NTkpSRkRGyff369Xr++eeVnZ2tmTNnaunSpcGzKE1NTRozZoyysrKC48vKyrRw4UIdOnRI48eP73M7Pp9PPp8veL2rq0uS5Pf75ff7P0uEEL1zRXJOm9iWz5NoIj9nggn5M9Ji/d/OtmMYDU7P6PR8kvMzOj2fFL2M4cznMsb065E8EAjo1ltvVUdHh3bv3h3c/uyzz6qgoEC5ubk6ePCgFi1apOLiYr344ouSpAULFui9997T9u3bgz/T3d2tQYMGaevWrSovL+9zWzU1NVqxYkWf7XV1dSFPHwEAAHt1d3frzjvvVGdnp1JTUz91bL/PoFRWVqqlpSWknEgfFZBeY8aMUU5OjqZOnapjx45pxIgR/bqtJUuWqLq6Oni9q6tLeXl5Ki0tvWzAcPj9ftXX12vatGlyu90Rm9cWtuUbXbP98oPC5EkwWjkxoKX7E+QLuCI+f6z1J19LTVmUVxVZtt1PI83p+STnZ3R6Pil6GXufAbkS/SooVVVV2rJli3bt2qWrr776U8dOmjRJknT06FGNGDFC2dnZ2rdvX8iYtrY2SfrE1614PB55PJ4+291ud1TuHNGa1xa25PP1RK9A+AKuqM4fa+Hks+FY94ct99NocXo+yfkZnZ5PinzGcOYK623GxhhVVVVp48aN2rlzpwoLCy/7MwcOHJAk5eTkSJK8Xq/eeusttbe3B8fU19crNTVVo0aNCmc5AADAocI6g1JZWam6ujq99NJLGjJkiFpbWyVJaWlpGjhwoI4dO6a6ujrNmDFDQ4cO1cGDB/Xggw/q5ptv1tixYyVJpaWlGjVqlO666y6tWrVKra2tevTRR1VZWXnJsyQAAODzJ6wzKM8884w6Ozs1ZcoU5eTkBC+//OUvJUnJycl65ZVXVFpaqqKiIn33u9/VrFmztHnz5uAciYmJ2rJlixITE+X1evWtb31L3/72t0M+NwUAAHy+hXUG5XJv+MnLy1NjY+Nl5ykoKNDWrVvDuWkAAPA5wnfxAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwTlgFpba2VjfeeKOGDBmizMxM3XbbbTp8+HDImPPnz6uyslJDhw7V4MGDNWvWLLW1tYWMOXHihCoqKpSSkqLMzEw99NBDunjx4mdPAwAAHCGsgtLY2KjKykrt2bNH9fX18vv9Ki0t1blz54JjHnzwQW3evFkvvPCCGhsbdfLkSd1+++3B/T09PaqoqNCFCxf0+uuv69///d+1bt06LVu2LHKpAABAXEsKZ/C2bdtCrq9bt06ZmZlqbm7WzTffrM7OTj333HOqq6vT1772NUnS2rVrNXLkSO3Zs0eTJ0/Wjh079Pbbb+uVV15RVlaWbrjhBq1cuVKLFi1STU2NkpOTI5cOAADEpbAKysd1dnZKkjIyMiRJzc3N8vv9KikpCY4pKipSfn6+mpqaNHnyZDU1NWnMmDHKysoKjikrK9PChQt16NAhjR8/vs/t+Hw++Xy+4PWuri5Jkt/vl9/v/ywRQvTOFck5bWJbPk+iifycCSbkT6fpTz5bjveVsu1+GmlOzyc5P6PT80nRyxjOfC5jTL8eyQOBgG699VZ1dHRo9+7dkqS6ujrdc889IWVCkoqLi3XLLbfoBz/4gRYsWKD33ntP27dvD+7v7u7WoEGDtHXrVpWXl/e5rZqaGq1YsaLP9rq6OqWkpPRn+QAA4P9Yd3e37rzzTnV2dio1NfVTx/b7DEplZaVaWlqC5SSalixZourq6uD1rq4u5eXlqbS09LIBw+H3+1VfX69p06bJ7XZHbF5b2JZvdM32yw8KkyfBaOXEgJbuT5Av4Ir4/LHWn3wtNWVRXlVk2XY/jTSn55Ocn9Hp+aToZex9BuRK9KugVFVVacuWLdq1a5euvvrq4Pbs7GxduHBBHR0dSk9PD25va2tTdnZ2cMy+fftC5ut9l0/vmI/zeDzyeDx9trvd7qjcOaI1ry1syefriV6B8AVcUZ0/1sLJZ8Ox7g9b7qfR4vR8kvMzOj2fFPmM4cwV1rt4jDGqqqrSxo0btXPnThUWFobsnzBhgtxutxoaGoLbDh8+rBMnTsjr9UqSvF6v3nrrLbW3twfH1NfXKzU1VaNGjQpnOQAAwKHCOoNSWVmpuro6vfTSSxoyZIhaW1slSWlpaRo4cKDS0tI0b948VVdXKyMjQ6mpqfrOd74jr9eryZMnS5JKS0s1atQo3XXXXVq1apVaW1v16KOPqrKy8pJnSQAAwOdPWAXlmWeekSRNmTIlZPvatWt19913S5J+/OMfKyEhQbNmzZLP51NZWZl+9rOfBccmJiZqy5YtWrhwobxerwYNGqS5c+fqscce+2xJAACAY4RVUK7kDT8DBgzQ6tWrtXr16k8cU1BQoK1bt4Zz0wAA4HOE7+IBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdZJivQAA0TV88cuxXkJYPIlGq4pjvQoAscYZFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgnbALyq5duzRz5kzl5ubK5XJp06ZNIfvvvvtuuVyukMv06dNDxpw+fVpz5sxRamqq0tPTNW/ePJ09e/YzBQEAAM4RdkE5d+6cxo0bp9WrV3/imOnTp+vUqVPByy9+8YuQ/XPmzNGhQ4dUX1+vLVu2aNeuXVqwYEH4qwcAAI4U9nfxlJeXq7y8/FPHeDweZWdnX3LfO++8o23btumNN97QxIkTJUlPP/20ZsyYoR/96EfKzc0Nd0kAAMBhovIalNdee02ZmZm67rrrtHDhQn344YfBfU1NTUpPTw+WE0kqKSlRQkKC9u7dG43lAACAOBPxbzOePn26br/9dhUWFurYsWN65JFHVF5erqamJiUmJqq1tVWZmZmhi0hKUkZGhlpbWy85p8/nk8/nC17v6uqSJPn9fvn9/oitvXeuSM5pE9vyeRJN5OdMMCF/Oo3T80n/m82W+2mk2fZ7GA1Oz+j0fFL0MoYzX8QLyuzZs4N/HzNmjMaOHasRI0botdde09SpU/s1Z21trVasWNFn+44dO5SSktLvtX6S+vr6iM9pE1vyrSqO3twrJwaiN7kFnJ5Psud+Gi1Ozyc5P6PT80mRz9jd3X3FYyNeUD7ummuu0bBhw3T06FFNnTpV2dnZam9vDxlz8eJFnT59+hNft7JkyRJVV1cHr3d1dSkvL0+lpaVKTU2N2Fr9fr/q6+s1bdo0ud3uiM1rC9vyja7ZHvE5PQlGKycGtHR/gnwBV8TnjzWn55P+N6Mt99NIs+33MBqcntHp+aToZex9BuRKRL2g/PGPf9SHH36onJwcSZLX61VHR4eam5s1YcIESdLOnTsVCAQ0adKkS87h8Xjk8Xj6bHe73VG5c0RrXlvYks/XE71/YH0BV1TnjzWn55PsuZ9Gi9PzSc7P6PR8UuQzhjNX2AXl7NmzOnr0aPD68ePHdeDAAWVkZCgjI0MrVqzQrFmzlJ2drWPHjunhhx/Wl770JZWVlUmSRo4cqenTp2v+/Plas2aN/H6/qqqqNHv2bN7BAwAAJPXjXTz79+/X+PHjNX78eElSdXW1xo8fr2XLlikxMVEHDx7UrbfeqmuvvVbz5s3ThAkT9F//9V8hZ0DWr1+voqIiTZ06VTNmzNBNN92kZ599NnKpAABAXAv7DMqUKVNkzCe/g2D79su/riAjI0N1dXXh3jQAAPic4Lt4AACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA6yTFegEAcCmja7bL1+OK9TKu2B+eqIj1EgBH4QwKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrhF1Qdu3apZkzZyo3N1cul0ubNm0K2W+M0bJly5STk6OBAweqpKRER44cCRlz+vRpzZkzR6mpqUpPT9e8efN09uzZzxQEAAA4R9gF5dy5cxo3bpxWr159yf2rVq3SU089pTVr1mjv3r0aNGiQysrKdP78+eCYOXPm6NChQ6qvr9eWLVu0a9cuLViwoP8pAACAo4T9OSjl5eUqLy+/5D5jjH7yk5/o0Ucf1de//nVJ0n/8x38oKytLmzZt0uzZs/XOO+9o27ZteuONNzRx4kRJ0tNPP60ZM2boRz/6kXJzcz9DHAAA4AQR/aC248ePq7W1VSUlJcFtaWlpmjRpkpqamjR79mw1NTUpPT09WE4kqaSkRAkJCdq7d6++8Y1v9JnX5/PJ5/MFr3d1dUmS/H6//H5/xNbfO1ck57SJbfk8iSbycyaYkD+dxun5pPjNeKW/V7b9HkaD0zM6PZ8UvYzhzBfRgtLa2ipJysrKCtmelZUV3Nfa2qrMzMzQRSQlKSMjIzjm42pra7VixYo+23fs2KGUlJRILD1EfX19xOe0iS35VhVHb+6VEwPRm9wCTs8nxV/GrVu3hjXelt/DaHJ6RqfnkyKfsbu7+4rHxsVH3S9ZskTV1dXB611dXcrLy1NpaalSU1Mjdjt+v1/19fWaNm2a3G53xOa1hW35Rtdsj/icngSjlRMDWro/Qb5A/HxM+pVyej4pfjO21JRd0Tjbfg+jwekZnZ5Pil7G3mdArkREC0p2drYkqa2tTTk5OcHtbW1tuuGGG4Jj2tvbQ37u4sWLOn36dPDnP87j8cjj8fTZ7na7o3LniNa8trAlXzS/Z8UXcMXV97iEy+n5pPjLGO7vlC2/h9Hk9IxOzydFPmM4c0X0c1AKCwuVnZ2thoaG4Lauri7t3btXXq9XkuT1etXR0aHm5ubgmJ07dyoQCGjSpEmRXA4AAIhTYZ9BOXv2rI4ePRq8fvz4cR04cEAZGRnKz8/XAw88oH/5l3/Rl7/8ZRUWFmrp0qXKzc3VbbfdJkkaOXKkpk+frvnz52vNmjXy+/2qqqrS7NmzeQcPAACQ1I+Csn//ft1yyy3B672vDZk7d67WrVunhx9+WOfOndOCBQvU0dGhm266Sdu2bdOAAQOCP7N+/XpVVVVp6tSpSkhI0KxZs/TUU09FIA4AAHCCsAvKlClTZMwnv/3P5XLpscce02OPPfaJYzIyMlRXVxfuTQMAgM8JvosHAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1kmK9ABuNrtkuX48r1ssIyx+eqIj1EgAAiBjOoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1uGD2gAgAoYvfvmKxnkSjVYV2/GBkHzAI2zGGRQAAGAdCgoAALAOBQUAAFiH16A4xJU8/23Tc98AAHwazqAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUiXlBqamrkcrlCLkVFRcH958+fV2VlpYYOHarBgwdr1qxZamtri/QyAABAHIvKGZTrr79ep06dCl52794d3Pfggw9q8+bNeuGFF9TY2KiTJ0/q9ttvj8YyAABAnIrKlwUmJSUpOzu7z/bOzk4999xzqqur09e+9jVJ0tq1azVy5Ejt2bNHkydPjsZyAABAnIlKQTly5Ihyc3M1YMAAeb1e1dbWKj8/X83NzfL7/SopKQmOLSoqUn5+vpqamj6xoPh8Pvl8vuD1rq4uSZLf75ff74/Yunvn8iSYiM1pk95cTs0nOT+j0/NJzs9oU75IPn5eat5ozR9rTs8nRS9jOPO5jDER/S35zW9+o7Nnz+q6667TqVOntGLFCn3wwQdqaWnR5s2bdc8994SUDUkqLi7WLbfcoh/84AeXnLOmpkYrVqzos72urk4pKSmRXD4AAIiS7u5u3Xnnners7FRqauqnjo14Qfm4jo4OFRQU6Mknn9TAgQP7VVAudQYlLy9Pf/7zny8bMBx+v1/19fVauj9BvoArYvPawpNgtHJiwLH5JOdndHo+yfkZbcrXUlMWlXl7H0unTZsmt9sdlduIJafnk6KXsaurS8OGDbuighKVp3j+Wnp6uq699lodPXpU06ZN04ULF9TR0aH09PTgmLa2tku+ZqWXx+ORx+Pps93tdkflzuELuOTrcd4DYy+n55Ocn9Hp+STnZ7QhX7T/cY3WY7QtnJ5PinzGcOaK+uegnD17VseOHVNOTo4mTJggt9uthoaG4P7Dhw/rxIkT8nq90V4KAACIExE/g/LP//zPmjlzpgoKCnTy5EktX75ciYmJ+uY3v6m0tDTNmzdP1dXVysjIUGpqqr7zne/I6/XyDh4AABAU8YLyxz/+Ud/85jf14Ycf6qqrrtJNN92kPXv26KqrrpIk/fjHP1ZCQoJmzZoln8+nsrIy/exnP4v0MgAAQByLeEHZsGHDp+4fMGCAVq9erdWrV0f6pgEAgEPwXTwAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKwT9U+SBQDYafjil6MyryfRaFWxNLpme8Q/LfcPT1REdD7YizMoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDoUFAAAYB0KCgAAsA4FBQAAWIeCAgAArENBAQAA1qGgAAAA61BQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANZJivUCAAC4UsMXvxzrJciTaLSqWBpds12+Htdlx//hiYr/g1U5D2dQAACAdSgoAADAOhQUAABgHQoKAACwDgUFAABYh4ICAACsQ0EBAADWoaAAAADrxPSD2lavXq0f/vCHam1t1bhx4/T000+ruLg4lksCACCibPhwuXD1fhhdLMXsDMovf/lLVVdXa/ny5frtb3+rcePGqaysTO3t7bFaEgAAsETMCsqTTz6p+fPn65577tGoUaO0Zs0apaSk6Oc//3mslgQAACwRk6d4Lly4oObmZi1ZsiS4LSEhQSUlJWpqauoz3ufzyefzBa93dnZKkk6fPi2/3x+xdfn9fnV3dyvJn6CewOW/XyHeJAWMursDjs0nOT+j0/NJzs/o9HyS8zM6PZ/0vxk//PBDud3uiM175swZSZIx5vKDTQx88MEHRpJ5/fXXQ7Y/9NBDpri4uM/45cuXG0lcuHDhwoULFwdc3n///ct2hbj4NuMlS5aouro6eD0QCOj06dMaOnSoXK7Itdeuri7l5eXp/fffV2pqasTmtYXT80nOz+j0fJLzMzo9n+T8jE7PJ0UvozFGZ86cUW5u7mXHxqSgDBs2TImJiWprawvZ3tbWpuzs7D7jPR6PPB5PyLb09PSorS81NdWxdzrJ+fkk52d0ej7J+Rmdnk9yfkan55OikzEtLe2KxsXkRbLJycmaMGGCGhoagtsCgYAaGhrk9XpjsSQAAGCRmD3FU11drblz52rixIkqLi7WT37yE507d0733HNPrJYEAAAsEbOCcscdd+hPf/qTli1bptbWVt1www3atm2bsrKyYrUkeTweLV++vM/TSU7h9HyS8zM6PZ/k/IxOzyc5P6PT80l2ZHQZcyXv9QEAAPi/w3fxAAAA61BQAACAdSgoAADAOhQUAABgHQrK/7d69WoNHz5cAwYM0KRJk7Rv375YL6nfdu3apZkzZyo3N1cul0ubNm0K2W+M0bJly5STk6OBAweqpKRER44cic1i+6G2tlY33nijhgwZoszMTN122206fPhwyJjz58+rsrJSQ4cO1eDBgzVr1qw+Hwxoq2eeeUZjx44NfkCS1+vVb37zm+D+eM72SZ544gm5XC498MADwW3xnLOmpkYulyvkUlRUFNwfz9n+2gcffKBvfetbGjp0qAYOHKgxY8Zo//79wf3x/FgzfPjwPsfQ5XKpsrJSkjOOYU9Pj5YuXarCwkINHDhQI0aM0MqVK0O+Jyemx/Czf7NO/NuwYYNJTk42P//5z82hQ4fM/PnzTXp6umlra4v10vpl69at5nvf+5558cUXjSSzcePGkP1PPPGESUtLM5s2bTK/+93vzK233moKCwvNX/7yl9gsOExlZWVm7dq1pqWlxRw4cMDMmDHD5Ofnm7NnzwbH3HfffSYvL880NDSY/fv3m8mTJ5uvfOUrMVz1lfv1r39tXn75ZfPuu++aw4cPm0ceecS43W7T0tJijInvbJeyb98+M3z4cDN27Fhz//33B7fHc87ly5eb66+/3pw6dSp4+dOf/hTcH8/Zep0+fdoUFBSYu+++2+zdu9f8/ve/N9u3bzdHjx4Njonnx5r29vaQ41dfX28kmVdffdUY44xj+Pjjj5uhQ4eaLVu2mOPHj5sXXnjBDB482Pz0pz8NjonlMaSgGGOKi4tNZWVl8HpPT4/Jzc01tbW1MVxVZHy8oAQCAZOdnW1++MMfBrd1dHQYj8djfvGLX8RghZ9de3u7kWQaGxuNMR/lcbvd5oUXXgiOeeedd4wk09TUFKtlfiZf+MIXzL/92785LtuZM2fMl7/8ZVNfX2/+7u/+LlhQ4j3n8uXLzbhx4y65L96z9Vq0aJG56aabPnG/0x5r7r//fjNixAgTCAQccwwrKirMvffeG7Lt9ttvN3PmzDHGxP4Yfu6f4rlw4YKam5tVUlIS3JaQkKCSkhI1NTXFcGXRcfz4cbW2tobkTUtL06RJk+I2b2dnpyQpIyNDktTc3Cy/3x+SsaioSPn5+XGXsaenRxs2bNC5c+fk9XodlU2SKisrVVFREZJHcsYxPHLkiHJzc3XNNddozpw5OnHihCRnZJOkX//615o4caL+/u//XpmZmRo/frz+9V//NbjfSY81Fy5c0PPPP697771XLpfLMcfwK1/5ihoaGvTuu+9Kkn73u99p9+7dKi8vlxT7YxgX32YcTX/+85/V09PT5xNss7Ky9D//8z8xWlX0tLa2StIl8/buiyeBQEAPPPCAvvrVr2r06NGSPsqYnJzc5wsl4ynjW2+9Ja/Xq/Pnz2vw4MHauHGjRo0apQMHDsR9tl4bNmzQb3/7W73xxht99sX7MZw0aZLWrVun6667TqdOndKKFSv0t3/7t2ppaYn7bL1+//vf65lnnlF1dbUeeeQRvfHGG/qnf/onJScna+7cuY56rNm0aZM6Ojp09913S4r/+2evxYsXq6urS0VFRUpMTFRPT48ef/xxzZkzR1Ls/7343BcUxLfKykq1tLRo9+7dsV5KRF133XU6cOCAOjs79Z//+Z+aO3euGhsbY72siHn//fd1//33q76+XgMGDIj1ciKu9/9AJWns2LGaNGmSCgoK9Ktf/UoDBw6M4coiJxAIaOLEifr+978vSRo/frxaWlq0Zs0azZ07N8ari6znnntO5eXlys3NjfVSIupXv/qV1q9fr7q6Ol1//fU6cOCAHnjgAeXm5lpxDD/3T/EMGzZMiYmJfV593dbWpuzs7BitKnp6Mzkhb1VVlbZs2aJXX31VV199dXB7dna2Lly4oI6OjpDx8ZQxOTlZX/rSlzRhwgTV1tZq3Lhx+ulPf+qIbNJHT3O0t7frb/7mb5SUlKSkpCQ1NjbqqaeeUlJSkrKyshyRs1d6erquvfZaHT161DHHMCcnR6NGjQrZNnLkyOBTWU55rHnvvff0yiuv6B/+4R+C25xyDB966CEtXrxYs2fP1pgxY3TXXXfpwQcfVG1traTYH8PPfUFJTk7WhAkT1NDQENwWCATU0NAgr9cbw5VFR2FhobKzs0PydnV1ae/evXGT1xijqqoqbdy4UTt37lRhYWHI/gkTJsjtdodkPHz4sE6cOBE3GT8uEAjI5/M5JtvUqVP11ltv6cCBA8HLxIkTNWfOnODfnZCz19mzZ3Xs2DHl5OQ45hh+9atf7fP2/nfffVcFBQWSnPFYI0lr165VZmamKioqgtuccgy7u7uVkBBaAxITExUIBCRZcAyj/jLcOLBhwwbj8XjMunXrzNtvv20WLFhg0tPTTWtra6yX1i9nzpwxb775pnnzzTeNJPPkk0+aN99807z33nvGmI/eNpaenm5eeuklc/DgQfP1r389bt76Z4wxCxcuNGlpaea1114LeRtgd3d3cMx9991n8vPzzc6dO83+/fuN1+s1Xq83hqu+cosXLzaNjY3m+PHj5uDBg2bx4sXG5XKZHTt2GGPiO9un+et38RgT3zm/+93vmtdee80cP37c/Pd//7cpKSkxw4YNM+3t7caY+M7Wa9++fSYpKck8/vjj5siRI2b9+vUmJSXFPP/888Ex8f5Y09PTY/Lz882iRYv67HPCMZw7d6754he/GHyb8YsvvmiGDRtmHn744eCYWB5DCsr/9/TTT5v8/HyTnJxsiouLzZ49e2K9pH579dVXjaQ+l7lz5xpjPnrr2NKlS01WVpbxeDxm6tSp5vDhw7FddBgulU2SWbt2bXDMX/7yF/OP//iP5gtf+IJJSUkx3/jGN8ypU6dit+gw3HvvvaagoMAkJyebq666ykydOjVYToyJ72yf5uMFJZ5z3nHHHSYnJ8ckJyebL37xi+aOO+4I+XyQeM721zZv3mxGjx5tPB6PKSoqMs8++2zI/nh/rNm+fbuRdMk1O+EYdnV1mfvvv9/k5+ebAQMGmGuuucZ873vfMz6fLzgmlsfQZcxffWQcAACABT73r0EBAAD2oaAAAADrUFAAAIB1KCgAAMA6FBQAAGAdCgoAALAOBQUAAFiHggIAAKxDQQEAANahoAAAAOtQUAAAgHUoKAAAwDr/DyGOMlug36YaAAAAAElFTkSuQmCC",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data['age'].hist()"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "50a38XraOx_H"
},
"source": [
"## 3. Groupby() and value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 142
},
"colab_type": "code",
"id": "YzT4WdqIP3kC",
"outputId": "58666a52-07b7-49fd-f486-25ce969a2423"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" survived | \n",
" age | \n",
"
\n",
" \n",
" sex | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" female | \n",
" 2.048969 | \n",
" 0.752577 | \n",
" 28.687071 | \n",
"
\n",
" \n",
" male | \n",
" 2.300912 | \n",
" 0.205167 | \n",
" 30.585233 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass survived age\n",
"sex \n",
"female 2.048969 0.752577 28.687071\n",
"male 2.300912 0.205167 30.585233"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['sex']).mean()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 266
},
"colab_type": "code",
"id": "DVtVErEbOiM-",
"outputId": "a558bdb2-9399-43f7-ad48-41cc87e4d162"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" survived | \n",
" age | \n",
"
\n",
" \n",
" sex | \n",
" pclass | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" female | \n",
" 1 | \n",
" 0.962406 | \n",
" 37.037594 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.893204 | \n",
" 27.499191 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.473684 | \n",
" 22.185307 | \n",
"
\n",
" \n",
" male | \n",
" 1 | \n",
" 0.350993 | \n",
" 41.029250 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.145570 | \n",
" 30.815401 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.169054 | \n",
" 25.962273 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" survived age\n",
"sex pclass \n",
"female 1 0.962406 37.037594\n",
" 2 0.893204 27.499191\n",
" 3 0.473684 22.185307\n",
"male 1 0.350993 41.029250\n",
" 2 0.145570 30.815401\n",
" 3 0.169054 25.962273"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['sex', 'pclass']).mean()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"colab_type": "code",
"id": "yfQpGJRfQGh6",
"outputId": "1c9ef555-5395-4fb1-aa69-8c1a21ab66e6"
},
"outputs": [
{
"data": {
"text/plain": [
"3 501\n",
"1 284\n",
"2 261\n",
"Name: pclass, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['pclass'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"colab_type": "code",
"id": "ebXdlFfCOmr3",
"outputId": "e043f673-665d-472f-c508-984a33813543"
},
"outputs": [
{
"data": {
"text/plain": [
"3 106\n",
"2 33\n",
"1 15\n",
"Name: pclass, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[data['age'] < 18]['pclass'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "9sXTWzZqYv2T"
},
"source": [
"## 4. Exercice\n",
"- Créer des catégories d'ages avec la fonction map() de pandas\n",
"- Créer des catégories de genres avec cat.codes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Solution"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "xB7KJby3YgeY",
"jupyter": {
"source_hidden": true
},
"tags": [
"hide-cell"
]
},
"outputs": [],
"source": [
"def category_ages(age):\n",
" if age <= 20:\n",
" return '<20 ans'\n",
" elif (age > 20) & (age <= 30):\n",
" return '20-30 ans'\n",
" elif (age > 30) & (age <= 40):\n",
" return '30-40 ans'\n",
" else:\n",
" return '+40 ans'"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HRyqvMbPZAc5",
"jupyter": {
"source_hidden": true
},
"tags": [
"hide-cell"
]
},
"outputs": [],
"source": [
"data['age'] = data['age'].map(category_ages)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 221
},
"colab_type": "code",
"collapsed": true,
"id": "cOfqUmDeZCBH",
"jupyter": {
"outputs_hidden": true,
"source_hidden": true
},
"outputId": "02a1b0ab-df8f-492a-e4a8-1b5a13d75a07",
"tags": [
"hide-cell"
]
},
"outputs": [
{
"data": {
"text/plain": [
"0 0\n",
"1 1\n",
"2 0\n",
"3 1\n",
"4 0\n",
" ..\n",
"1301 1\n",
"1304 0\n",
"1306 1\n",
"1307 1\n",
"1308 1\n",
"Length: 1046, dtype: int8"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['sex'].astype('category').cat.codes"
]
}
],
"metadata": {
"colab": {
"authorship_tag": "ABX9TyNQ51crWhYH+NcDKYMuWfiE",
"include_colab_link": true,
"name": "Untitled13.ipynb",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "sti",
"language": "python",
"name": "sti"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}